#########################################################################################################
# Data analysis: Expanding the State
#########################################################################################################
rm(list = ls())

options(repos="https://CRAN.R-project.org")

#install.packages('pacman')

pacman::p_load(dplyr, estimatr, tibble, grid, ggpubr, texreg, extrafont,
               ggplot2, xtable, stats, data.table, stargazer, 
               tidyverse, broom, purrr, modelsummary, rstudioapi,
               kableExtra, mice, car, haven, readxl, stringi, vtable)

setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

#Read file that imports and cleans data
source("cleaning.R")

##Add option to format modelsummary tables in tex
options(modelsummary_format_numeric_latex = "plain")
options(modelsummary_factory_default = 'kableExtra')
options(modelsummary_factory_latex = 'kableExtra')
options(modelsummary_factory_html = 'kableExtra')

#lfe will use only one thread
oldopts <- options(lfe.threads=1)

##############
## Figure 2 ##
##############

#Purpose: Generate department-level number of movements to plot in Figure 2

movements <- read.csv("data/movements.csv")

table(movements$dept)

##############
## Figure 3 ##
##############

#Create bar chart of bureaucratic offices by year
#Note: The totals diverge very slightly from the data_ets data
#because 1) some of the totals are taken from the totals reported
#by the Peruvian government (a different source from the municipal-level data)
#and 2) a very few offices could not be confidently matched to municipalities.

sipa_off <- read.csv("data/bureaucratic_offices_by_year.csv")

png("figures/fig_3.png", units="in", type="cairo", width=7, height=5, res=500, family = "Times New Roman")

ggplot(sipa_off,aes(x = factor(year), y = off, fill = type))  +  
  geom_bar(stat = "identity")+ 
  scale_fill_grey(start = 0.55, end = 0.25,
                  name = "") + 
  labs(title= "", 
       x="Year",y="Total offices") +
  theme_bw()+ 
  theme(axis.text = element_text(size=16),
        axis.title = element_text(size=16),
        legend.text=element_text(size=16))

dev.off()

##############
## Figure 4 ##
##############
##Purpose: Perform parallel trends analysis

png("figures/fig_4.png", units="in", type="cairo", width=7, height=5, res=500, family = "Times New Roman")
ggplot(summary_data, aes(x = year, y = mean_outcome, color = as.factor(movement_cat))) +
  geom_point(size = 2) + 
  geom_line() +
  geom_vline(xintercept = 1963.5, linetype = "dashed", color = "black") +  
  labs(title = "",
       x = "",
       y = "Probability of SIPA office",
       color = "Movement") +
  theme_classic() +
  scale_color_manual(values = c("grey80","grey65", "grey45", "black")) +
  theme(legend.position = "bottom",                      
    legend.text = element_text(size = 16, color = "#0a0a0a"),                         
    legend.title = element_text(size = 16, color = "#0a0a0a"),   
    axis.text = element_text(size = 16, color = "#0a0a0a"),  
    axis.title = element_text(size = 16, color = "#0a0a0a"),  
    plot.title = element_text(size = 16, color = "#0a0a0a"),  
    strip.text = element_text(size = 16, color = "#0a0a0a"),
    axis.title.y = element_text(size = 16,
                                color = "#0a0a0a"),
    axis.title.x = element_text(size = 16,
                                color = "#0a0a0a")) 

dev.off()

#############
## Table 2 ##
#############

##Purpose: Analyze effects of movements on bureaucratic index
#Data: ets_final

dind_mod_index_nocontrols <- lm_robust(index ~ movement_6364,
                                       data = data_ets)

dind_mod_index_nocontrols_pre63 <- lm_robust(index ~ movement_6364 * 
                                  movement_pre63_dum, 
                                data = data_ets)

dind_mod_index_controlsfes <- lm_robust(index ~ movement_6364 + 
                                          elev_1k +teachers_pc +
                                          schools_pc + students_pc +
                                          ap_win_recode + ap_win_missing +
                                          roads_1964+
                                          total_pop_61 +
                                          rural_perc_61 + 
                                          log_hac + hac_size + 
                                          bureaucrat_binary, 
                                        fixed_effects = data_ets$prov,
                                        data = data_ets)

dind_mod_index_controlsfes_pre63 <- lm_robust(index ~ movement_6364 * 
                                          movement_pre63_dum + 
                                          elev_1k +teachers_pc +
                                          schools_pc + students_pc +
                                          ap_win_recode + ap_win_missing +
                                          roads_1964+
                                          total_pop_61 +
                                          rural_perc_61 + 
                                          log_hac + hac_size + 
                                          bureaucrat_binary, 
                                        fixed_effects = data_ets$prov,
                                        data = data_ets)


#Compose table for model summary
models_index_dind <- list("Differenced index (pre/post 1963)" =  
                            dind_mod_index_nocontrols,
                          "Differenced index (pre/post 1963)" =
                            dind_mod_index_nocontrols_pre63,
                          "Differenced index (pre/post 1963)" = 
                            dind_mod_index_controlsfes,
                          "Differenced index (pre/post 1963)" = 
                            dind_mod_index_controlsfes_pre63)

tab_2 <- modelsummary(models_index_dind, 
                          coef_map = c('movement_6364' = 
                                         "1963/1964 movement",
                                       'movement_pre63_dum' =
                                         "Pre-1963 movement",
                                       'movement_6364:movement_pre63_dum' =
                                         "1963/1964 movement x Pre-1963 movement",
                                       '(Intercept)' = 'Intercept'),
                          add_rows = data.frame(rbind(c("Fixed effects", "No", "No", "Yes","Yes"),
                                                      c("Controls", "No", "No", "Yes","Yes"))),
                          output = "latex", gof_omit = "IC",
                          col.names = NULL,
                          stars = c('†' = 0.1,
                                    '*' = 0.05,
                                    '**' = .01))

tab_2 <- tab_2  %>% column_spec(1,width = "1in") %>%
  column_spec(2:7,width = ".75in") %>%
  add_header_above(c("", "(1)","(2)", "(3)", "(4)")) %>%
  add_header_above(c(" ", "Differenced index (pre/post 1963)" = 4)) %>%
  add_header_above(c(" ", "Dependent variable:" = 4))

tab_2

save_kable(tab_2, "tables/tab_2.tex", format = 'latex', 
           keep_tex = TRUE,
           self_contained = TRUE)

#############
## Table 3 ##
#############
##Purpose: Analyze components of bureaucratic index

#Outcome is differenced SIPA office (pre/post-1963); no controls
dind_mod_sipa_fes <- lm_robust(sipa_pre_post_diff ~ 
                                           movement_6364,
                                      data = data_ets,
                                      fixed_effects = data_ets$prov)

#Outcome is differenced SIPA office (pre/post-1963); pre_63 movements + provincial fes
dind_mod_sipa_pre63_fes <- lm_robust(sipa_pre_post_diff ~ 
                                         movement_6364 * 
                                         movement_pre63_dum,
                                       fixed_effects = data_ets$prov,
                                       data = data_ets)


#Outcome is CP office in 1967; no controls
dind_mod_cp_fes <- lm_robust(central_cp_67 ~ movement_6364,
                                    fixed_effects = data_ets$prov,
                                    data = data_ets)

#Outcome is CP office in 1967; pre_63 movements + provincial fes
dind_mod_cp_pre63_fes <- lm_robust(central_cp_67 ~ movement_6364 * 
                                      movement_pre63_dum,
                                    fixed_effects = data_ets$prov,
                                    data = data_ets)

#Compose table for model summary
models_cp_sipa_dind <- list("SIPA offices (pre/post-1963)" =  
                              dind_mod_sipa_fes,
                            "SIPA offices (pre/post-1963)" =
                              dind_mod_sipa_pre63_fes,
                            "CP offices (1967)" = 
                              dind_mod_cp_fes,
                            "CP offices (1967)" = 
                              dind_mod_cp_pre63_fes)

tab_3 <- modelsummary(models_cp_sipa_dind, 
                            coef_map = c('movement_6364' = 
                                           "1963/1964 movement",
                                         'movement_pre63_dum' =
                                           "Pre-1963 movement",
                                         'movement_6364:movement_pre63_dum' =
                                           "1963/1964 movement x Pre-1963 movement",
                                         '(Intercept)' = 'Intercept'),
                            add_rows = data.frame(rbind(c("Fixed effects", "Yes", "Yes", "Yes","Yes"))),
                            output = "latex", gof_omit = "IC",
                            col.names = NULL,stars = c('†' = 0.1,
                                                      '*' = 0.05,
                                                      '**' = .01))

tab_3 <- tab_3  %>% column_spec(1,width = "1in") %>%
  column_spec(2:7,width = ".75in") %>%
  add_header_above(c("", "(1)","(2)", "(3)", "(4)")) %>%
  add_header_above(c(" ", "SIPA offices (pre/post-1963)" = 2, "CP offices (1967)" = 2)) %>%
  add_header_above(c(" ", "Dependent variable:" = 4))

tab_3

save_kable(tab_3, "tables/tab_3.tex", format = 'latex', 
           keep_tex = TRUE,
           self_contained = TRUE)

#############
## Table 4 ##
#############
#Purpose: Analyze relationship between independent/dependent 
#variables and long-term civil conflict, as measured by Shining
#Path control

#Peasant movement as predictor
mov_sl_nocontrols <- lm_robust(dummy_rebel ~ 
                                 movement_6364, 
                               data = data_ets)

mov_sl_nocontrols_fes <- lm_robust(dummy_rebel ~ 
                                     movement_6364, 
                                   fixed_effects = data_ets$prov,
                                   data = data_ets)

#Sipa (post-1963) as predictor
sipa_sl_nocontrols <- lm_robust(dummy_rebel  ~ 
                                  sipa_64_67, 
                                data = data_ets)

sipa_sl_nocontrols_fes <- lm_robust(dummy_rebel ~ 
                                      sipa_64_67, 
                                    fixed_effects = data_ets$prov,
                                    data = data_ets)

##CP offices in 1967 as predictor
cp_sl_nocontrols <- lm_robust(dummy_rebel  ~ 
                                central_cp_67, 
                              data = data_ets)

cp_sl_nocontrols_fes <- lm_robust(dummy_rebel ~ 
                                    central_cp_67, 
                                  fixed_effects = data_ets$prov,
                                  data = data_ets)


#Compose table for model summary
models_sl <- list("Movements (1963/1964)" =  
                    mov_sl_nocontrols,
                  "Movements (1963/1964)" =
                    mov_sl_nocontrols_fes,
                  "SIPA offices (1967)" = 
                    sipa_sl_nocontrols,
                  "SIPA offices (1967)" = 
                    sipa_sl_nocontrols_fes,
                  "CP offices (1967)" = 
                    cp_sl_nocontrols,
                  "CP offices (1967)" = 
                    cp_sl_nocontrols_fes)

tab_4 <- modelsummary(models_sl, 
                       coef_map = c('movement_6364' = 
                                      "1963/1964 movement",
                                    'sipa_64_67' =
                                      "SIPA (1964-1967)",
                                    'central_cp_67' =
                                      "CP office (1967)",
                                    '(Intercept)' = 'Intercept'),
                       add_rows = data.frame(rbind(c("Fixed effects", "No", "Yes", "No","Yes", "No","Yes"))),
                       output = "latex", gof_omit = "IC",
                       col.names = NULL,
                       stars = c('†' = 0.1,
                                 '*' = 0.05,
                                 '**' = .01))

tab_4 <- tab_4  %>% column_spec(1,width = "1in") %>%
  column_spec(2:7,width = ".75in") %>%
  add_header_above(c("", "(1)","(2)", "(3)", "(4)","(5)", "(6)")) %>%
  add_header_above(c(" ", "Rebel control (binary)" = 6)) %>%
  add_header_above(c(" ", "Dependent variable:" = 6))


save_kable(tab_4, "tables/tab_4.tex", format = 'latex', keep_tex = TRUE, self_contained = TRUE)

###############
## Figure A1 ##
###############
#Purpose: Plot 1966 CP projects by type
cp_works_66 <- read.csv("data/cp_works_1966.csv")

png("figures/fig_a1.png", units="in", type="cairo", width=8, height=5, res=500, family = "Times New Roman")

ggplot(cp_works_66, aes(x = work, y = percent * 100)) +
  geom_bar(stat = "identity", fill = "black") +
  scale_y_continuous(
    labels = function(x) {
      #Format first value as percentage
      labels <- as.character(x)
      labels[which.max(x)] <- paste0(formatC(max(x), format = "f", digits = 0), "%")        
      return(labels)
    },
    limits = c(0, 100)
  ) +  
  labs(x = "",
       y = "Percent of total projects") +
  theme_classic(base_size = 14) +             
  theme(                      
        legend.text = element_text(size = 18, color = "#0a0a0a"),                         
        legend.title = element_text(size = 18, color = "#0a0a0a"),   
        axis.text = element_text(size = 18, color = "#0a0a0a"),  
        axis.title = element_text(size = 18, color = "#0a0a0a"),  
        plot.title = element_text(size = 18, color = "#0a0a0a"),  
        strip.text = element_text(size = 18, color = "#0a0a0a"),
        axis.title.y = element_text(size = 18,
                                    color = "#0a0a0a"),
        axis.title.x = element_text(size = 18,
                                    color = "#0a0a0a")
  )

dev.off()


###############
## Figure A2 ##
###############
#Purpose: Plot movements by year

movements_by_year <- movements %>%
  group_by(year) %>%
  summarise(count = n()) %>%
  arrange(year)

png("figures/fig_a2.png", units="in", type="cairo", width=8, height=5, res=500, family = "Times New Roman")

ggplot(movements_by_year, aes(x = factor(year), y = count)) +
  geom_bar(stat = "identity") +
  labs(
    x = "",
    y = "Number of Mobilizations",
    title = ""
  ) +
  theme_classic(base_size = 14) +
  theme(
    legend.text = element_text(size = 18, color = "#0a0a0a"),
    legend.title = element_text(size = 18, color = "#0a0a0a"),
    axis.text = element_text(size = 18, color = "#0a0a0a"),
    axis.title = element_text(size = 18, color = "#0a0a0a"),
    plot.title = element_text(size = 18, color = "#0a0a0a"),
    strip.text = element_text(size = 18, color = "#0a0a0a"),
    axis.title.y = element_text(size = 18, color = "#0a0a0a"),
    axis.title.x = element_text(size = 18, color = "#0a0a0a", margin = margin(t = 10))
  )
dev.off()

###############
## Figure A3 ##
###############

#Purpose: Plot strategies of mobilization by year
movements_63_64 <- movements %>%
  #Include only years 1963 and 1964
  filter(year %in% c(1963, 1964)) %>%
  dplyr::select(year, dplyr::starts_with("strat_")) %>%
  dplyr::select(-strat_Lima) %>%
  group_by(year) %>%
  summarise(across(everything(), sum, na.rm = TRUE)) %>%
  pivot_longer(cols = -year, names_to = "strategy", values_to = "count")%>%
  mutate(
    strategy_label = str_to_title(str_remove(strategy, "strat_"))
  )

png("figures/fig_a3.png", units="in", type="cairo", width=8, height=5, res=500, family = "Times New Roman")

ggplot(movements_63_64, aes(x = strategy_label, y = count)) +
  geom_bar(stat = "identity") +
  labs(
    x = "Strategy Type",
    y = "Total Count (1963/1964)",
    title = ""
  ) +
  theme_classic(base_size = 14) +
  theme(
    legend.text = element_text(size = 18, color = "#0a0a0a"),
    legend.title = element_text(size = 18, color = "#0a0a0a"),
    axis.text = element_text(size = 18, color = "#0a0a0a"),
    axis.title = element_text(size = 18, color = "#0a0a0a"),
    plot.title = element_text(size = 18, color = "#0a0a0a"),
    strip.text = element_text(size = 18, color = "#0a0a0a"),
    axis.title.y = element_text(size = 18, color = "#0a0a0a"),
    axis.title.x = element_text(size = 18, color = "#0a0a0a", margin = margin(t = 10))
  )
dev.off()

###############
## Figure A4 ##
###############

#Purpose: Test common-support assumption

#Calculate p-scores
psm <- glm(movement_6364 ~ elev_1k +
             teachers_pc +
             schools_pc + students_pc +
             ap_win_recode + 
             roads_1964 +
             total_pop_61 +
             rural_perc_61 + 
             log_hac + hac_size + 
             bureaucrat_binary + sipa_1962 +
             movement_pre63_dum, 
           family = binomial(), 
           data = data_ets)

#Create range of common support
data_ets$pscores <- predict(psm, type = "response")


#Define the common support range (5th to 95th percentiles)
pscore_range <- quantile(data_ets$pscores, c(0.05, 0.95))


png("figures/fig_a4.png", units="in", type="cairo", width=7, height=5, res=500, 
    family = "Times New Roman")

ggplot(data_ets, aes(x = pscores, color = factor(movement_6364))) +
  geom_density(aes(y = after_stat(density)), linewidth = 1, bw = 0.05) +
  labs(title = "",
       x = "Propensity Score", y = "Density",
       color = "Movement 1963/1964") +
  geom_vline(xintercept = pscore_range[1], 
             linetype = "dashed", color = "black") +  
  geom_vline(xintercept = pscore_range[2], 
             linetype = "dashed", color = "black") +  
  scale_color_manual(values = c("gray20", "gray70"), 
                     labels = c("No", "Yes")) + 
  theme_classic() +
  theme(
    legend.position = "bottom", 
    legend.title = element_text(size = 14),  
    legend.text = element_text(size = 14),  
    axis.title = element_text(size = 14),  
    axis.text = element_text(size = 14))

dev.off()


###############
## Figure A5 ##
###############

#Purpose: Plot parallel trends with confidence intervals

png("figures/fig_a5.png", units="in", type="cairo", width=7, height=5, res=500, family = "Times New Roman")

ggplot(summary_data, aes(x = year, y = mean_outcome, color = as.factor(movement_cat))) +
  geom_point(size = 3, position = position_dodge(width = 0.2)) + 
  geom_vline(xintercept = 1963.5, linetype = "dashed", color = "red") + 
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.1, position = position_dodge(width = 0.2)) +
  geom_vline(xintercept = 1963.5, linetype = "dashed", color = "black") +  
  labs(title = "",
       x = "",
       y = "Probability of SIPA office",
       color = "Movement") +
  theme_classic() +
  scale_color_manual(values = c("grey80", "grey45", "black")) +
  theme(legend.position = "bottom",                      
        legend.text = element_text(size = 16, color = "#0a0a0a"),                         
        legend.title = element_text(size = 16, color = "#0a0a0a"),   
        axis.text = element_text(size = 16, color = "#0a0a0a"),  
        axis.title = element_text(size = 16, color = "#0a0a0a"),  
        plot.title = element_text(size = 16, color = "#0a0a0a"),  
        strip.text = element_text(size = 16, color = "#0a0a0a"),
        axis.title.y = element_text(size = 16,
                                    color = "#0a0a0a"),
        axis.title.x = element_text(size = 16,
                                    color = "#0a0a0a"))

dev.off()


##############
## Table A1 ##
##############
#Purpose: Provide summary statistics for main variables
#Mac users: Install XQuartz for the summarytools package to work.
#if using the most recent version of summarytools, use function dfSummary() instead
#of st()

taba1_summ_stats <- vtable::sumtable((data_ets %>% dplyr::select(c("movement_6364", 
                                "sipa_pre_post_diff",
                                "central_cp_67", 
                                "elev_1k", "teachers_pc",
                                "schools_pc", "students_pc",
                                "ap_win", "roads_1964",
                                "total_pop_61",
                                "rural_perc_61", 
                                "log_hac", "hac_size","sur_region",
                                "centro_region",
                                "north_region", 
                                "bureaucrat_binary"
))), 
labels = c("Movement in 1963/1964", "SIPA office diff.", "CP office",
           "Altitude", 
           "Teachers pc","Students pc", 
           "Schools pc", "AP mayor", "Road access",
           "Total population", "Rural population",
           "Haciendas (logged)", 
           "Hacienda size",  "Region: North",
           "Region: Center", "Region: South",
           "State presence"),
summ = c('mean(x)',  'median(x)',
         'sd(x)', 'min(x)', 'pctile(x)[25]', 
         'pctile(x)[75]', 'max(x)'),
summ.names = c("Mean", "Median", "Std. Dev.", 
               "Min", "25th perc.", 
               "75th perc.", "Max"),
out = "latex", file = "tables/tab_a1.tex")


##############
## Table A2 ##
##############
#Purpose: Calculate relationship between pre- and post-1963 movements

taba2_pre_post <- lm_robust(as.numeric(data_ets$movement_6364)~
                           data_ets$movement_pre63_dum)


#Compose table for model summary
tab_a2_mod <- list("Movement in 1963/1964" = taba2_pre_post)


tab_a2 <- modelsummary(tab_a2_mod, 
                                coef_map = c('data_ets$movement_pre63_dum' = 
                                               "Movement pre-1963",
                                             '(Intercept)' = 'Intercept'),
                                output = "latex", gof_omit = "IC",
                                col.names = NULL,
                                stars = c('†' = 0.1,
                                          '*' = 0.05,
                                          '**' = .01))



tab_a2 <- tab_a2  %>% column_spec(1,width = "1in") %>%
  column_spec(2,width = ".75in") %>%
  add_header_above(c("", "(1)")) %>%
  add_header_above(c(" ", "Movement in 1963/1964" = 1)) %>%
  add_header_above(c(" ", "Dependent variable: Land redistribution" = 2))


save_kable(tab_a2, "tables/tab_a2.tex", format = 'latex', 
           keep_tex = TRUE,
           self_contained = TRUE)



##############
## Table A3 ##
##############
#Purpose: Perform analysis in Table 2 with 1964 movements only

dind_mod_index_controlsfes_64 <- lm_robust(index ~ movement_1964 * 
                                             movement_pre63_dum + 
                                             elev_1k +teachers_pc +
                                             schools_pc + students_pc +
                                             ap_win_recode + 
                                             ap_win_missing + roads_1964 +
                                             total_pop_61 +
                                             rural_perc_61 + 
                                             log_hac + hac_size + 
                                             bureaucrat_binary, 
                                           fixed_effects = data_ets$prov,
                                           data = data_ets)

dind_mod_index_controls_64 <- lm_robust(index ~ movement_1964 * 
                                          movement_pre63_dum + 
                                          elev_1k +teachers_pc +
                                          schools_pc + students_pc +
                                          ap_win_recode + 
                                          ap_win_missing +
                                          roads_1964+
                                          total_pop_61 +
                                          rural_perc_61 + 
                                          log_hac + hac_size + 
                                          bureaucrat_binary +
                                          sur_region +
                                          centro_region +
                                          north_region,
                                        data = data_ets)

dind_mod_index_nocontrols_64 <- lm_robust(index ~ movement_1964 * 
                                            movement_pre63_dum,
                                          data = data_ets)

dind_mod_index_fes_64 <- lm_robust(index ~ movement_1964 * 
                                     movement_pre63_dum, 
                                   fixed_effects = data_ets$prov,
                                   data = data_ets)



#Compose table for model summary
models_index_dind_64 <- list("Differenced index (pre/post 1963)" =  
                               dind_mod_index_nocontrols_64,
                             "Differenced index (pre/post 1963)" =
                               dind_mod_index_controls_64,
                             "Differenced index (pre/post 1963)" = 
                               dind_mod_index_fes_64,
                             "Differenced index (pre/post 1963)" = 
                               dind_mod_index_controlsfes_64)

tab_a3 <- modelsummary(models_index_dind_64, 
                             coef_map = c('movement_1964' = 
                                            "1964 movement",
                                          'movement_pre63_dum' =
                                            "Pre-1963 movement",
                                          'movement_1964:movement_pre63_dum' =
                                            "1964 movement x Pre-1963 movement",
                                          '(Intercept)' = 'Intercept'),
                             add_rows = data.frame(rbind(c("Fixed effects", "No","No", "Yes", "Yes"),
                                                         c("Controls", "No", "Yes", "No", "Yes"))),
                             output = "latex", gof_omit = "IC",
                             col.names = NULL,
                             stars = c('†' = 0.1,
                                       '*' = 0.05,
                                       '**' = .01))

tab_a3 <- tab_a3  %>% column_spec(1,width = "1in") %>%
  column_spec(2:7,width = ".75in") %>%
  add_header_above(c("", "(1)","(2)", "(3)", "(4)")) %>%
  add_header_above(c(" ", "Differenced index (pre/post 1963)" = 4)) %>%
  add_header_above(c(" ", "Dependent variable:" = 4))


save_kable(tab_a3, "tables/tab_a3.tex", format = 'latex', keep_tex = TRUE, self_contained = TRUE)

##############
## Table A4 ##
##############
#Purpose: Perform analysis on non-differenced SIPA outcome

dind_mod_sipa_6467_controlsfes <- lm_robust(sipa_64_67 ~ 
                                              movement_6364 * 
                                              movement_pre63_dum + 
                                              elev_1k +teachers_pc +
                                              schools_pc + students_pc +
                                              ap_win_recode + 
                                              ap_win_missing +
                                              roads_1964+
                                              total_pop_61 +
                                              rural_perc_61 + 
                                              log_hac + hac_size + 
                                              bureaucrat_binary, 
                                          fixed_effects = data_ets$prov,
                                          data = data_ets)

dind_mod_sipa_6467_controls <- lm_robust(sipa_64_67 ~ movement_6364 * 
                                           movement_pre63_dum + 
                                           elev_1k +teachers_pc +
                                           schools_pc + students_pc +
                                           ap_win_recode + 
                                           ap_win_missing +
                                           roads_1964+
                                           total_pop_61 +
                                           rural_perc_61 + 
                                           log_hac + hac_size + 
                                           bureaucrat_binary +
                                           sur_region +
                                           centro_region +
                                           north_region,
                                       data = data_ets)

dind_mod_sipa_6467_nocontrols <- lm_robust(sipa_64_67 ~ movement_6364 *
                                          movement_pre63_dum,
                                         data = data_ets)


dind_mod_sipa_6467_fes <- lm_robust(sipa_64_67 ~ movement_6364 *
                                    movement_pre63_dum, 
                                  fixed_effects = data_ets$prov,
                                  data = data_ets)



#Compose table for model summary
models_sipa_6467_dind <- list("SIPA offices (post-1963)" =  
                              dind_mod_sipa_6467_nocontrols,
                            "SIPA offices (post-1963)" =
                              dind_mod_sipa_6467_controls,
                            "SIPA offices (post-1963)" = 
                              dind_mod_sipa_6467_fes,
                            "SIPA offices (post-1963)" = 
                              dind_mod_sipa_6467_controlsfes)

tab_a4 <- modelsummary(models_sipa_6467_dind, 
                            coef_map = c('movement_6364' = 
                                           "1963/1964 movement",
                                         'movement_pre63_dum' =
                                           "Pre-1963 movement",
                                         'movement_6364:movement_pre63_dum' =
                                           "1963/1964 movement x Pre-1963 movement",
                                         '(Intercept)' = 'Intercept'),
                            add_rows = data.frame(rbind(c("Fixed effects", "No", "No", "Yes","Yes"),
                                                        c("Controls", "No", "Yes", "No","Yes"))),
                            output = "latex", gof_omit = "IC",
                            col.names = NULL,
                       stars = c('†' = 0.1,
                                 '*' = 0.05,
                                 '**' = .01))

tab_a4 <- tab_a4  %>% column_spec(1,width = "1in") %>%
  column_spec(2:7,width = ".75in") %>%
  add_header_above(c("", "(1)","(2)", "(3)", "(4)")) %>%
  add_header_above(c(" ", "SIPA offices (1967)" = 4)) %>%
  add_header_above(c(" ", "Dependent variable:" = 4))

tab_a4

save_kable(tab_a4, "tables/tab_a4.tex", format = 'latex', 
           keep_tex = TRUE,
           self_contained = TRUE)

##############
## Table A5 ##
##############
#Purpose: Reproduce Table 2 using all movements as the independent variable

dind_mod_index_nocontrols_allmove <- lm_robust(index ~ any_move,
                                               data = data_ets)

dind_mod_index_fes_allmove <- lm_robust(index ~ any_move, 
                                        fixed_effects = data_ets$prov,
                                        data = data_ets)

dind_mod_cp_nocontrols_allmove <- lm_robust(central_cp_67 ~ any_move,
                                            data = data_ets)

dind_mod_cp_fes_allmove <- lm_robust(central_cp_67 ~ any_move, 
                                     fixed_effects = data_ets$prov,
                                     data = data_ets)

dind_mod_sipa_nocontrols_allmove <- lm_robust(sipa_pre_post_diff ~ any_move,
                                              data = data_ets)

dind_mod_sipa_fes_allmove <- lm_robust(sipa_pre_post_diff ~ any_move, 
                                       fixed_effects = data_ets$prov,
                                       data = data_ets)

#Compose table for model summary
models_index_dind_allmove <- list("Differenced index (pre/post 1963)" =  
                                    dind_mod_index_nocontrols_allmove,
                                  "Differenced index (pre/post 1963)" =
                                    dind_mod_index_fes_allmove,
                                  "CP (offices)" = 
                                    dind_mod_cp_nocontrols_allmove,
                                  "CP (offices)" = 
                                    dind_mod_cp_fes_allmove,
                                  "SIPA (offices)" =  
                                    dind_mod_sipa_nocontrols_allmove,
                                  "SIPA (offices)" =
                                    dind_mod_sipa_fes_allmove)

tab_a5 <- modelsummary(models_index_dind_allmove, 
                                  coef_map = c('any_move' = 
                                                 "Any movement (1956-1964)",
                                               '(Intercept)' = 'Intercept'),
                                  add_rows = data.frame(rbind(c("Fixed effects", "No", "Yes", "No","Yes", "No","Yes"))),
                                  output = "latex", gof_omit = "IC",
                                  col.names = NULL,
                                  stars = c('†' = 0.1,
                                            '*' = 0.05,
                                            '**' = .01))

tab_a5 <- tab_a5  %>% 
  column_spec(1,width = "1in") %>%
  column_spec(2:9,width = ".75in") %>%
  add_header_above(c("", "(1)","(2)", "(3)", "(4)", "(5)", "(6)")) %>%
  add_header_above(c(" ", "Differenced index (pre/post 1963)" = 2, 
                     "CP (1967)" = 2,
                     "SIPA (pre/post-1963)" = 2)) %>%
  add_header_above(c(" ", "Dependent variable:" = 6))


save_kable(tab_a5, "tables/tab_a5.tex", format = 'latex', 
           keep_tex = TRUE,
           self_contained = TRUE)

##############
## Table A6 ##
##############

#Purpose: Calculate the effects for the subset of observations that 
#fall within common support

treated_density <- density(data_ets$pscores[data_ets$movement_6364 == 1])
control_density <- density(data_ets$pscores[data_ets$movement_6364 == 0])

#Get the minimum and maximum propensity scores for treated and control groups
min_pscore <- max(min(treated_density$x), min(control_density$x))
max_pscore <- min(max(treated_density$x), max(control_density$x))

#Create a common range for both densities 
common_range <- seq(min_pscore, max_pscore, length.out = 1000)

#Interpolate the densities to the common range
treated_interp <- approx(treated_density$x, treated_density$y, xout = common_range)$y
control_interp <- approx(control_density$x, control_density$y, xout = common_range)$y

#Clip negative densities to zero (density estimates might go negative)
treated_interp[treated_interp < 0] <- 0
control_interp[control_interp < 0] <- 0

#Calculate the overlapping area by taking the minimum of both densities at each point
min_density <- pmin(treated_interp, control_interp)

#Define common support dataset
data_common_support <- data_ets[
  data_ets$pscores >= pscore_range[1] & data_ets$pscores <= pscore_range[2], ]

#Re-run analysis from Table 2 on common support data
dind_mod_index_controlsfes_cs <- lm_robust(index ~ movement_6364 * 
                                             movement_pre63_dum + 
                                             elev_1k +teachers_pc +
                                             schools_pc + students_pc +
                                             ap_win_recode + ap_win_missing +
                                             roads_1964+
                                             total_pop_61 +
                                             rural_perc_61 + 
                                             log_hac + hac_size + 
                                             bureaucrat_binary +
                                             sur_region +
                                             centro_region +
                                             north_region, 
                                           fixed_effects = data_common_support$prov,
                                           data = data_common_support)

dind_mod_index_controls_cs <- lm_robust(index ~ movement_6364 * 
                                          movement_pre63_dum + 
                                          elev_1k +teachers_pc +
                                          schools_pc + students_pc +
                                          ap_win_recode + ap_win_missing +
                                          roads_1964+
                                          total_pop_61 +
                                          rural_perc_61 + 
                                          log_hac + hac_size + 
                                          bureaucrat_binary,
                                        data = data_common_support)

dind_mod_index_nocontrols_cs <- lm_robust(index ~ movement_6364 * 
                                            movement_pre63_dum,
                                          data = data_common_support)

dind_mod_index_fes_cs <- lm_robust(index ~ movement_6364 * 
                                     movement_pre63_dum, 
                                   fixed_effects = data_common_support$prov,
                                   data = data_common_support)

#Compose table for model summary
models_index_dind_cs <- list("Differenced index (pre/post 1963)" =  
                               dind_mod_index_nocontrols_cs,
                             "Differenced index (pre/post 1963)" =
                               dind_mod_index_controls_cs,
                             "Differenced index (pre/post 1963)" = 
                               dind_mod_index_fes_cs,
                             "Differenced index (pre/post 1963)" = 
                               dind_mod_index_controlsfes_cs)

tab_a6 <- modelsummary(models_index_dind_cs, 
                             coef_map = c('movement_6364' = 
                                            "1963/1964 movement",
                                          'movement_pre63_dum' =
                                            "Pre-1963 movement",
                                          'movement_6364:movement_pre63_dum' =
                                            "1963/1964 movement x Pre-1963 movement",
                                          '(Intercept)' = 'Intercept'),
                             add_rows = data.frame(rbind(c("Fixed effects", "No", "No", "Yes","Yes"),
                                                         c("Controls", "No", "Yes", "No","Yes"))),
                             output = "latex", gof_omit = "IC",
                             col.names = NULL,
                             stars = c('†' = 0.1,
                                       '*' = 0.05,
                                       '**' = .01))



tab_a6 <- tab_a6  %>% column_spec(1,width = "1in") %>%
  column_spec(2:7,width = ".75in") %>%
  add_header_above(c("", "(1)","(2)", "(3)", "(4)")) %>%
  add_header_above(c(" ", "Differenced index (pre/post 1963)" = 4)) %>%
  add_header_above(c(" ", "Dependent variable:" = 4))


save_kable(tab_a6, "tables/tab_a6.tex", format = 'latex', 
           keep_tex = TRUE,
           self_contained = TRUE)

#Find the % of observations that fall in the range of common support
#cited in paper
nrow(data_common_support)/nrow(data_ets)

##############
## Table A7 ##
##############
#Purpose: Replicate Table 2 removing potential post-treatment variables

dind_mod_index_controls_no_post <- lm_robust(index ~ movement_6364 * movement_pre63_dum + 
                                               elev_1k +teachers_pc +
                                               schools_pc + students_pc +
                                               total_pop_61 +
                                               rural_perc_61 + 
                                               log_hac + hac_size + 
                                               bureaucrat_binary +
                                               sur_region +
                                               centro_region +
                                               north_region, 
                                             data = data_ets)

dind_mod_index_fes_no_post <- lm_robust(index ~ movement_6364 * movement_pre63_dum+ 
                                          elev_1k +teachers_pc +
                                          schools_pc + students_pc +
                                          total_pop_61 +
                                          rural_perc_61 + 
                                          log_hac + hac_size + 
                                          bureaucrat_binary +
                                          sur_region +
                                          centro_region +
                                          north_region, 
                                        fixed_effects = data_ets$prov,
                                        data = data_ets)

dind_mod_cp_controls_no_post <- lm_robust(central_cp_67 ~ movement_6364 * movement_pre63_dum+ 
                                            elev_1k +teachers_pc +
                                            schools_pc + students_pc +
                                            total_pop_61 +
                                            rural_perc_61 + 
                                            log_hac + hac_size + 
                                            bureaucrat_binary +
                                            sur_region +
                                            centro_region +
                                            north_region, 
                                          data = data_ets)

dind_mod_cp_fes_no_post <- lm_robust(central_cp_67 ~ movement_6364 * movement_pre63_dum + 
                                       elev_1k +teachers_pc +
                                       schools_pc + students_pc +
                                       total_pop_61 +
                                       rural_perc_61 + 
                                       log_hac + hac_size + 
                                       bureaucrat_binary +
                                       sur_region +
                                       centro_region +
                                       north_region, 
                                     fixed_effects = data_ets$prov,
                                     data = data_ets)

dind_mod_sipa_controls_no_post <- lm_robust(sipa_pre_post_diff ~ movement_6364 * movement_pre63_dum+ 
                                              elev_1k +teachers_pc +
                                              schools_pc + students_pc +
                                              total_pop_61 +
                                              rural_perc_61 + 
                                              log_hac + hac_size + 
                                              bureaucrat_binary +
                                              sur_region +
                                              centro_region +
                                              north_region, 
                                            data = data_ets)

dind_mod_sipa_fes_no_post <- lm_robust(sipa_pre_post_diff ~ movement_6364 * movement_pre63_dum+ 
                                         elev_1k +teachers_pc +
                                         schools_pc + students_pc +
                                         total_pop_61 +
                                         rural_perc_61 + 
                                         log_hac + hac_size + 
                                         bureaucrat_binary +
                                         sur_region +
                                         centro_region +
                                         north_region, 
                                       fixed_effects = data_ets$prov,
                                       data = data_ets)

#Compose table for model summary
models_index_dind_no_post <- list("Differenced index (pre/post 1963)" =  
                                    dind_mod_index_controls_no_post,
                                  "Differenced index (pre/post 1963)" =
                                    dind_mod_index_fes_no_post,
                                  "CP (offices)" = 
                                    dind_mod_cp_controls_no_post,
                                  "CP (offices)" = 
                                    dind_mod_cp_fes_no_post,
                                  "SIPA (offices)" =  
                                    dind_mod_sipa_controls_no_post,
                                  "SIPA (offices)" =
                                    dind_mod_sipa_fes_no_post)

tab_a7 <- modelsummary(models_index_dind_no_post, 
                                  coef_map = c('movement_6364' = 
                                                 "1963/1964 movement",
                                               "movement_pre63_dum" =
                                                 "Pre-1963 movement",
                                               "movement_6364:movement_pre63_dum" =
                                                 "1963/1964 movement x Pre-1963 movement",
                                               '(Intercept)' = 'Intercept'),
                                  add_rows = data.frame(rbind(c("Fixed effects", "No", "Yes", "No","Yes", "No","Yes"))),
                                  output = "latex", gof_omit = "IC",
                                  col.names = NULL,
                                  stars = c('†' = 0.1,
                                            '*' = 0.05,
                                            '**' = .01))

tab_a7 <- tab_a7  %>% 
  column_spec(1,width = "1in") %>%
  column_spec(2:9,width = ".75in") %>%
  add_header_above(c("", "(1)","(2)", "(3)", "(4)", "(5)", "(6)")) %>%
  add_header_above(c(" ", "Differenced index (pre/post 1963)" = 2, 
                     "SIPA (pre/post 1963)" = 2,
                     "CP (1967)" = 2)) %>%
  add_header_above(c(" ", "Dependent variable:" = 6))


save_kable(tab_a7, "tables/tab_a7.tex", format = 'latex', 
           keep_tex = TRUE,
           self_contained = TRUE)


##############
## Table A8 ##
##############
#Purpose: Test for relationship between 1967 CP office and 1983 CP office

mod_cp_83_67 <- lm_robust(central_cp_83 ~ central_cp_67, 
                                             data = data_ets)

mod_cp_83_67_fes <- lm_robust(central_cp_83 ~ central_cp_67, 
                          data = data_ets, fixed_effects = data_ets$prov)

#Compose table for model summary
model_cp_83_67 <- list("CP office (1983)" =  
                         mod_cp_83_67,
                       "CP office (1983)" =  
                         mod_cp_83_67_fes)

tab_a8 <- modelsummary(model_cp_83_67, 
                       coef_map = c('central_cp_67' = 
                                      "CP office (1967)",
                                    '(Intercept)' = 'Intercept'),
                       output = "latex", gof_omit = "IC",
                       col.names = NULL,
                       add_rows = data.frame(rbind(c("Fixed effects", "No", "Yes"))),
                                              stars = c('†' = 0.1,
                                 '*' = 0.05,
                                 '**' = .01))

tab_a8 <- tab_a8  %>% 
  column_spec(1,width = "1in") %>%
  column_spec(2:3,width = ".75in") %>%
  add_header_above(c("", "(1)", "(2)")) %>%
  add_header_above(c(" ", "CP office (1983)" = 2)) %>%
  add_header_above(c(" ", "Dependent variable:" = 2))


save_kable(tab_a8, "tables/tab_a8.tex", format = 'latex', 
           keep_tex = TRUE,
           self_contained = TRUE)


##############
## Table A9 ##
##############
#Purpose: Test of endurance of effects: Land reform (Albertus)
#Relationship between SIPA (1967) and later land reform

mod_pc_land <- lm_robust(as.numeric(data_ets$landredist_pc)~
                           data_ets$sipa_1967)

mod_pc_land_fes <- lm_robust(as.numeric(data_ets$landredist_pc)~
                           data_ets$sipa_1967, 
                           fixed_effects = data_ets$prov)

mod_dummy_land <- lm_robust(as.numeric(data_ets$land_binary) ~
                              data_ets$sipa_1967)

mod_dummy_land_fes <- lm_robust(as.numeric(data_ets$land_binary) ~
                              data_ets$sipa_1967, 
                              fixed_effects = data_ets$prov)

#Compose table for model summary
models_land_redist <- list("Land redistribution" = mod_pc_land, 
                           "Land redistribution" = mod_pc_land_fes, 
                           "Land redistribution" = mod_dummy_land,
                           "Land redistribution" = mod_dummy_land_fes)


tab_a9 <- modelsummary(models_land_redist, 
                             coef_map = c('data_ets$sipa_1967' = 
                                            "SIPA (1967)",
                                          '(Intercept)' = 'Intercept'),
                            output = "latex", gof_omit = "IC",
                             col.names = NULL,
                            add_rows = data.frame(rbind(c("Fixed effects", 
                                                          "No", "Yes", "No", "Yes"))),
                             stars = c('†' = 0.1,
                                       '*' = 0.05,
                                       '**' = .01))



tab_a9 <- tab_a9  %>% column_spec(1,width = "1in") %>%
  column_spec(2:5,width = ".75in") %>%
  add_header_above(c("", "(1)","(2)", "(3)", "(4)")) %>%
  add_header_above(c(" ", "Per capita" = 2, "Binary" = 2)) %>%
  add_header_above(c(" ", "Dependent variable: Land redistribution" = 4))


save_kable(tab_a9, "tables/tab_a9.tex", format = 'latex', 
           keep_tex = TRUE,
           self_contained = TRUE)


###############
## Table A10 ##
###############
#Purpose: Test relationship between independent/dependent
#variables and later *contested* control

#Peasant movements in 1963/1964
mov_sl_contest_nocontrols <- lm_robust(dummy_contested ~ 
                                         movement_6364, 
                                       data = data_ets)

mov_sl_contest_nocontrols_fes <- lm_robust(dummy_contested ~ 
                                             movement_6364, 
                                           fixed_effects = data_ets$prov,
                                           data = data_ets)

#SIPA in 1964-1967
sipa_sl_contest_nocontrols <- lm_robust(dummy_contested  ~ 
                                          sipa_64_67, 
                                        data = data_ets)

sipa_sl_contest_nocontrols_fes <- lm_robust(dummy_contested ~ 
                                              sipa_64_67, 
                                            fixed_effects = data_ets$prov,
                                            data = data_ets)

##CP offices in 1967

cp_sl_contest_nocontrols <- lm_robust(dummy_contested  ~ 
                                        central_cp_67, 
                                      data = data_ets)

cp_sl_contest_nocontrols_fes <- lm_robust(dummy_contested ~ 
                                            central_cp_67, 
                                          fixed_effects = data_ets$prov,
                                          data = data_ets)


#Compose table for model summary
models_sl_contest <- list("Movements (1963/1964)" =  
                            mov_sl_contest_nocontrols,
                          "Movements (1963/1964)" =
                            mov_sl_contest_nocontrols_fes,
                          "SIPA offices (1967)" = 
                            sipa_sl_contest_nocontrols,
                          "SIPA offices (1967)" = 
                            sipa_sl_contest_nocontrols_fes,
                          "CP offices (1967)" = 
                            cp_sl_contest_nocontrols,
                          "CP offices (1967)" = 
                            cp_sl_contest_nocontrols_fes)

tab_a10 <- modelsummary(models_sl_contest, 
                               coef_map = c('movement_6364' = 
                                              "1963/1964 movement",
                                            'sipa_64_67' =
                                              "SIPA (post-1963)",
                                            'central_cp_67' =
                                              "CP office (1967)",
                                            '(Intercept)' = 'Intercept'),
                               add_rows = data.frame(rbind(c("Fixed effects", "No", "Yes", "No","Yes", "No","Yes"))),
                               output = "latex", gof_omit = "IC",
                               col.names = NULL,
                               stars = c('†' = 0.1,
                                         '*' = 0.05,
                                         '**' = .01))

tab_a10 <- tab_a10  %>% column_spec(1,width = "1in") %>%
  column_spec(2:7,width = ".75in") %>%
  add_header_above(c("", "(1)","(2)", "(3)", "(4)","(5)", "(6)")) %>%
  add_header_above(c(" ", "Contested control (binary)" = 6)) %>%
  add_header_above(c(" ", "Dependent variable:" = 6))


save_kable(tab_a10, "tables/tab_a10.tex", format = 'latex', keep_tex = TRUE, self_contained = TRUE)


###############
## Table A11 ##
###############

##Purpose: Analyze effects of movements on bureaucratic index, excluding SIPA offices in 1963
##to ensure that the outcome is clearly from before/after Belaunde

dind_mod_index_nocontrols_exc_63 <- lm_robust(index_excl_1963 ~ movement_6364,
                                       data = data_ets)

dind_mod_index_nocontrols_pre63_exc_63 <- lm_robust(index_excl_1963 ~ movement_6364 * 
                                               movement_pre63_dum, 
                                             data = data_ets)

dind_mod_index_controlsfes_exc_63 <- lm_robust(index_excl_1963 ~ movement_6364 + 
                                          elev_1k +teachers_pc +
                                          schools_pc + students_pc +
                                          ap_win_recode + ap_win_missing +
                                          roads_1964+
                                          total_pop_61 +
                                          rural_perc_61 + 
                                          log_hac + hac_size + 
                                          bureaucrat_binary, 
                                        fixed_effects = data_ets$prov,
                                        data = data_ets)

dind_mod_index_controlsfes_pre63_exc_63 <- lm_robust(index_excl_1963 ~ movement_6364 * 
                                                movement_pre63_dum + 
                                                elev_1k +teachers_pc +
                                                schools_pc + students_pc +
                                                ap_win_recode + ap_win_missing +
                                                roads_1964+
                                                total_pop_61 +
                                                rural_perc_61 + 
                                                log_hac + hac_size + 
                                                bureaucrat_binary, 
                                              fixed_effects = data_ets$prov,
                                              data = data_ets)


#Compose table for model summary
models_index_dind_exc_63 <- list("Differenced index (pre/post 1963)" =  
                            dind_mod_index_nocontrols_exc_63,
                          "Differenced index (pre/post 1963)" =
                            dind_mod_index_nocontrols_pre63_exc_63,
                          "Differenced index (pre/post 1963)" = 
                            dind_mod_index_controlsfes_exc_63,
                          "Differenced index (pre/post 1963)" = 
                            dind_mod_index_controlsfes_pre63_exc_63)

tab_a11 <- modelsummary(models_index_dind_exc_63, 
                      coef_map = c('movement_6364' = 
                                     "1963/1964 movement",
                                   'movement_pre63_dum' =
                                     "Pre-1963 movement",
                                   'movement_6364:movement_pre63_dum' =
                                     "1963/1964 movement x Pre-1963 movement",
                                   '(Intercept)' = 'Intercept'),
                      add_rows = data.frame(rbind(c("Fixed effects", "No", "No", "Yes","Yes"),
                                                  c("Controls", "No", "No", "Yes","Yes"))),
                      output = "latex", gof_omit = "IC",
                      col.names = NULL,
                      stars = c('†' = 0.1,
                                '*' = 0.05,
                                '**' = .01))

tab_a11 <- tab_a11  %>% column_spec(1,width = "1in") %>%
  column_spec(2:7,width = ".75in") %>%
  add_header_above(c("", "(1)","(2)", "(3)", "(4)")) %>%
  add_header_above(c(" ", "Differenced index (pre/post 1963)" = 4)) %>%
  add_header_above(c(" ", "Dependent variable:" = 4))

tab_a11

save_kable(tab_a11, "tables/tab_a11.tex", format = 'latex', 
           keep_tex = TRUE,
           self_contained = TRUE)
